knitr::opts_chunk$set(echo=F, warning=F, message = F)
library(tidyverse) library(recipes) library(Data.Quality.Reports) library(lubridate) MISSING_TYPES <- c(NA, "", " ", "NA", "nan", " / / ", "NULL", "null", "Null")
``` {r results="asis"}
data <- params$path %>% readRDS() missing <- c(MISSING_TYPES, " / / ", "NULL", "null", "Null")
date.feature <- data[[params$date_variable]] location.feature <- data[[params$location_variable]]
missing.data <- data %>% select(-c(params$location_variable, params$date_variable)) %>%
is.missing(missing)
missing.rates <- missing.data %>% apply(2, mean)
for (i in names(missing.rates)){
cat('\n')
cat(glue("{i}: {round(100*missing.rates[i], 3)}%\n"))
cat("\n")
}
## Summary of predictability of missing patterns based on location or date Predictability is measured via the ROC, which ranges from 0.5 to 1, with 1 implying perfect predictability. Data was divided into training and validation sets - so ROC represents predicatbility on data unseen during training. Value of > 0.8 may be cuase for concern (see the figures that follow for interpretation of models). ``` {r} missing.vars <- missing.rates[(missing.rates < 0.2) & (missing.rates > 0.001)] %>% names() date.missing <- lapply( missing.vars, function(i) { target <- data[[i]] %>% is.missing(missing) result <- feature.fit(target, date.feature) } ) location.missing <- lapply( missing.vars, function(i) { target <- data[[i]] %>% is.missing(missing) feature.fit(target, location.feature) } ) l <- length(missing.vars) if ( l > 0){ frm <- data.frame( Var = missing.vars, DateROC = sapply(date.missing, function(i) i$AUC)%>% round(3), LocROC = sapply(location.missing, function(i) i$AUC)%>% round(3) ) } else { frm <- data.frame( Var = NA, DateROC = NA, LocROC = NA ) } frm %>% knitr::kable(caption="ROC-AUC values for location and date models.")
to.group <- function(values, result) { values <- values %>% str_replace_all("`", "") factor(result$naming_vector[values], levels = result$naming_vector) } generate_plot <- function(result, title=NA){ plotdata <- varImp(result$model) %>% as.data.frame() %>% mutate(Group = to.group(rownames(.), result)) ncol <- (1 + nrow(plotdata) %/% 16) plotdata %>% ggplot(aes(x=Group, y=Score, color=Group)) + geom_point() + guides(color=guide_legend(ncol=ncol,byrow=FALSE)) + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + labs(title = title) }
r params$date_variable
on MissingFigures to summarize Date on missing bias.
``` {r, fig.width=8} l <- length(missing.vars) if (l > 0){ plt <- purrr::map( 1:l, function(i) generate_plot(date.missing[[i]], missing.vars[i]) %>% print() ); }
## `r params$location_variable` on Missing Figures to summarize Location on missing bias. ``` {r, fig.width=12} if (l > 0){ plt <- purrr::map( 1:l, function(i) generate_plot(location.missing[[i]], missing.vars[i]) %>% print() ); }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.